-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[NVPTX] Auto-upgrade nvvm.grid_constant to param attribute #155489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Auto-upgrade nvvm.grid_constant to param attribute #155489
Conversation
|
@llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-clang Author: Alex MacLean (AlexMaclean) ChangesUpgrade the !"grid_constant" !nvvm.annotation to a "nvvm.grid_constant" attribute. This attribute is much simpler for front-ends to apply and faster and simpler to query. Patch is 34.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155489.diff 9 Files Affected:
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index e874617796f86..78790daa1874a 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -87,10 +87,6 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
- static void
- addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
- const SmallVectorImpl<int> &GridConstantArgs);
-
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) {
@@ -266,27 +262,24 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// By default, all functions are device functions
if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
// OpenCL/CUDA kernel functions get kernel metadata
- // Create !{<func-ref>, metadata !"kernel", i32 1} node
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
if (FD->hasAttr<CUDAGlobalAttr>()) {
- SmallVector<int, 10> GCI;
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+
for (auto IV : llvm::enumerate(FD->parameters()))
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
- // For some reason arg indices are 1-based in NVVM
- GCI.push_back(IV.index() + 1);
- // Create !{<func-ref>, metadata !"kernel", i32 1} node
- F->setCallingConv(llvm::CallingConv::PTX_Kernel);
- addGridConstantNVVMMetadata(F, GCI);
+ F->addParamAttr(
+ IV.index(),
+ llvm::Attribute::get(F->getContext(), "nvvm.grid_constant"));
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
}
}
// Attach kernel metadata directly if compiling for NVPTX.
- if (FD->hasAttr<DeviceKernelAttr>()) {
+ if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
- }
}
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
@@ -306,29 +299,6 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
-void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
- llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
-
- llvm::Module *M = GV->getParent();
- llvm::LLVMContext &Ctx = M->getContext();
-
- // Get "nvvm.annotations" metadata node
- llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
-
- SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
- if (!GridConstantArgs.empty()) {
- SmallVector<llvm::Metadata *, 10> GCM;
- for (int I : GridConstantArgs)
- GCM.push_back(llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
- MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
- llvm::MDNode::get(Ctx, GCM)});
- }
-
- // Append metadata to nvvm.annotations
- MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
-}
-
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu
index e7000cab3cda5..120b854e56746 100644
--- a/clang/test/CodeGenCUDA/grid-constant.cu
+++ b/clang/test/CodeGenCUDA/grid-constant.cu
@@ -19,13 +19,9 @@ void foo() {
tkernel_const<S><<<1,1>>>({});
tkernel<const S><<<1,1>>>(1, {});
}
-//.
-//.
-// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
-// CHECK: [[META1]] = !{i32 1, i32 3}
-// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
-// CHECK: [[META3]] = !{i32 1}
-// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
-// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
-// CHECK: [[META6]] = !{i32 2}
-//.
+
+// CHECK: define dso_local ptx_kernel void @_Z6kernel1Sii(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %gc_arg1, i32 noundef %arg2, i32 noundef "nvvm.grid_constant" %gc_arg3)
+// CHECK: define ptx_kernel void @_Z13tkernel_constIK1SEvT_(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
+// CHECK: define ptx_kernel void @_Z13tkernel_constI1SEvT_(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
+// CHECK: define ptx_kernel void @_Z7tkernelIK1SEviT_(i32 noundef %dummy, ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
+
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 629bf2ea5afb4..4c8c605edfdd6 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -57,6 +57,19 @@ not.
When compiled, the PTX kernel functions are callable by host-side code.
+
+Parameter Attributes
+--------------------
+
+``"nvvm.grid_constant"``
+ This attribute may be attached to a ``byval`` parameter of a kernel function
+ to indicate that the parameter should be lowered as a direct reference to
+ the grid-constant memory of the parameter, as opposed to a copy of the
+ parameter in local memory. Writing to a grid-constant parameter is
+ undefined behavior. Unlike a normal ``byval`` parameter, the address of a
+ grid-constant parameter is not unique to a given function invocation but
+ instead is shared by all kernels in the grid.
+
.. _nvptx_fnattrs:
Function Attributes
@@ -2289,9 +2302,9 @@ The Kernel
; Intrinsic to read X component of thread ID
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
- define void @kernel(ptr addrspace(1) %A,
- ptr addrspace(1) %B,
- ptr addrspace(1) %C) {
+ define ptx_kernel void @kernel(ptr addrspace(1) %A,
+ ptr addrspace(1) %B,
+ ptr addrspace(1) %C) {
entry:
; What is my ID?
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
@@ -2314,9 +2327,6 @@ The Kernel
ret void
}
- !nvvm.annotations = !{!0}
- !0 = !{ptr @kernel, !"kernel", i32 1}
-
We can use the LLVM ``llc`` tool to directly run the NVPTX code generator:
@@ -2442,34 +2452,6 @@ and non-generic address spaces.
See :ref:`address_spaces` and :ref:`nvptx_intrinsics` for more information.
-Kernel Metadata
-^^^^^^^^^^^^^^^
-
-In PTX, a function can be either a `kernel` function (callable from the host
-program), or a `device` function (callable only from GPU code). You can think
-of `kernel` functions as entry-points in the GPU program. To mark an LLVM IR
-function as a `kernel` function, we make use of special LLVM metadata. The
-NVPTX back-end will look for a named metadata node called
-``nvvm.annotations``. This named metadata must contain a list of metadata that
-describe the IR. For our purposes, we need to declare a metadata node that
-assigns the "kernel" attribute to the LLVM IR function that should be emitted
-as a PTX `kernel` function. These metadata nodes take the form:
-
-.. code-block:: text
-
- !{<function ref>, metadata !"kernel", i32 1}
-
-For the previous example, we have:
-
-.. code-block:: llvm
-
- !nvvm.annotations = !{!0}
- !0 = !{ptr @kernel, !"kernel", i32 1}
-
-Here, we have a single metadata declaration in ``nvvm.annotations``. This
-metadata annotates our ``@kernel`` function with the ``kernel`` attribute.
-
-
Running the Kernel
------------------
@@ -2669,9 +2651,9 @@ Libdevice provides an ``__nv_powf`` function that we will use.
; libdevice function
declare float @__nv_powf(float, float)
- define void @kernel(ptr addrspace(1) %A,
- ptr addrspace(1) %B,
- ptr addrspace(1) %C) {
+ define ptx_kernel void @kernel(ptr addrspace(1) %A,
+ ptr addrspace(1) %B,
+ ptr addrspace(1) %C) {
entry:
; What is my ID?
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
@@ -2694,9 +2676,6 @@ Libdevice provides an ``__nv_powf`` function that we will use.
ret void
}
- !nvvm.annotations = !{!0}
- !0 = !{ptr @kernel, !"kernel", i32 1}
-
To compile this kernel, we perform the following steps:
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e200f3626e69d..7ea9c6dff13b8 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5381,6 +5381,16 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
return true;
}
+ if (K == "grid_constant") {
+ const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
+ for (const auto &Op : cast<MDNode>(V)->operands()) {
+ // For some reason, the index is 1-based in the metadata. Good thing we're
+ // able to auto-upgrade it!
+ const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
+ cast<Function>(GV)->addParamAttr(Index, Attr);
+ }
+ return true;
+ }
return false;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 274b04fdd30b5..8e97b422218f7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -55,15 +55,6 @@ void clearAnnotationCache(const Module *Mod) {
AC.Cache.erase(Mod);
}
-static void readIntVecFromMDNode(const MDNode *MetadataNode,
- std::vector<unsigned> &Vec) {
- for (unsigned i = 0, e = MetadataNode->getNumOperands(); i != e; ++i) {
- ConstantInt *Val =
- mdconst::extract<ConstantInt>(MetadataNode->getOperand(i));
- Vec.push_back(Val->getZExtValue());
- }
-}
-
static void cacheAnnotationFromMD(const MDNode *MetadataNode,
key_val_pair_t &retval) {
auto &AC = getAnnotationCache();
@@ -83,19 +74,8 @@ static void cacheAnnotationFromMD(const MDNode *MetadataNode,
if (ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(
MetadataNode->getOperand(i + 1))) {
retval[Key].push_back(Val->getZExtValue());
- } else if (MDNode *VecMd =
- dyn_cast<MDNode>(MetadataNode->getOperand(i + 1))) {
- // note: only "grid_constant" annotations support vector MDNodes.
- // assert: there can only exist one unique key value pair of
- // the form (string key, MDNode node). Operands of such a node
- // shall always be unsigned ints.
- auto [It, Inserted] = retval.try_emplace(Key);
- if (Inserted) {
- readIntVecFromMDNode(VecMd, It->second);
- continue;
- }
} else {
- llvm_unreachable("Value operand not a constant int or an mdnode");
+ llvm_unreachable("Value operand not a constant int");
}
}
}
@@ -179,16 +159,13 @@ static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) {
}
static bool argHasNVVMAnnotation(const Value &Val,
- const std::string &Annotation,
- const bool StartArgIndexAtOne = false) {
+ const std::string &Annotation) {
if (const Argument *Arg = dyn_cast<Argument>(&Val)) {
const Function *Func = Arg->getParent();
std::vector<unsigned> Annot;
if (findAllNVVMAnnotation(Func, Annotation, Annot)) {
- const unsigned BaseOffset = StartArgIndexAtOne ? 1 : 0;
- if (is_contained(Annot, BaseOffset + Arg->getArgNo())) {
+ if (is_contained(Annot, Arg->getArgNo()))
return true;
- }
}
}
return false;
@@ -250,8 +227,7 @@ bool isParamGridConstant(const Argument &Arg) {
}
// "grid_constant" counts argument indices starting from 1
- if (argHasNVVMAnnotation(Arg, "grid_constant",
- /*StartArgIndexAtOne*/ true))
+ if (Arg.hasAttribute("nvvm.grid_constant"))
return true;
return false;
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 8adde4ceefbf4..01ab47145940c 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -49,14 +49,14 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: st.param.b32 [func_retval0], %r10;
; PTX-NEXT: ret;
entry:
- %a. = select i1 %b, ptr %a, ptr addrspacecast (ptr addrspace(1) @gi to ptr), !dbg !17
- %idx.ext = sext i32 %c to i64, !dbg !18
- %add.ptr = getelementptr inbounds i8, ptr %a., i64 %idx.ext, !dbg !18
- %0 = load i32, ptr %add.ptr, align 1, !dbg !19
- ret i32 %0, !dbg !23
+ %a. = select i1 %b, ptr %a, ptr addrspacecast (ptr addrspace(1) @gi to ptr)
+ %idx.ext = sext i32 %c to i64
+ %add.ptr = getelementptr inbounds i8, ptr %a., i64 %idx.ext
+ %0 = load i32, ptr %add.ptr, align 1
+ ret i32 %0
}
-define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
+define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 "nvvm.grid_constant" %input1, i32 %input2, ptr %out, i32 %n) {
; PTX-LABEL: grid_const_int(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
@@ -71,7 +71,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; PTX-NEXT: st.global.b32 [%rd2], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_int(
-; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
@@ -85,7 +85,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
%struct.s = type { i32, i32 }
-define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
+define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, ptr %out){
; PTX-LABEL: grid_const_struct(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
@@ -100,7 +100,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; PTX-NEXT: st.global.b32 [%rd2], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
@@ -118,7 +118,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
ret void
}
-define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
+define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input) {
; PTX-LABEL: grid_const_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<4>;
@@ -136,7 +136,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
@@ -145,7 +145,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
ret void
}
-define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
+define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, i32 %a, ptr byval(i32) align 4 "nvvm.grid_constant" %b) {
; PTX-LABEL: multiple_grid_const_escape(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
@@ -179,7 +179,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 "nvvm.grid_constant" [[B:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
@@ -194,7 +194,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
ret void
}
-define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
+define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, ptr %addr) {
; PTX-LABEL: grid_const_memory_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<5>;
@@ -207,7 +207,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: st.global.b64 [%rd3], %rd4;
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
@@ -216,7 +216,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
ret void
}
-define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) {
+define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, ptr %result) {
; PTX-LABEL: grid_const_inlineasm_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<7>;
@@ -234,7 +234,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: ret;
; PTX-NOT .local
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
@@ -249,7 +249,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
ret void
}
-define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
+define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) "nvvm.grid_constant" %input, ptr %output) {
; PTX-LABEL: grid_const_partial_...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mlir side is trivial and looks fine
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice. LGTM.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/206/builds/5420 Here is the relevant piece of the build log for the reference |
Upgrade the !"grid_constant" !nvvm.annotation to a "nvvm.grid_constant" attribute. This attribute is much simpler for front-ends to apply and faster and simpler to query.